# Clears work environment
rm(list = ls())

# Sets working directory
setwd('C:/Users/Jason/Box Sync/Home Folder jdt34/733 - Maximum Likelihood Estimation/Lai & Slater 2006 replication')

# Loads data and libraries
load(file='cleanedLSdata.RData')
loadPkg(packs)

# Adapts separation plot to actual count data
sepcolor = c('#FFFFFF', '#00BFC4', '#0BCA88', '#16D14C', '#32D723', '#7FDE30', '#C7E43E', '#F19D5C', '#F8766D')
extra = '#EBCC4D'
lsmsort = lsm[with(lsm, order(majorinitone)), ]
rownames(lsmsort) = NULL
lsmsort$index = seq(1, (nrow(lsm) * 5), 5)
lsmsort$min = 0
lsmsort$max = 9
realcount = ggplot(data=lsmsort[which(lsmsort$majorinitone>=1),], aes(x=index, y=majorinitone, ymin=min, ymax=max, color=factor(majorinitone))) +
  geom_hline(y=1, color='black', alpha=.25) +
  geom_hline(y=2, color='black', alpha=.25) +
  geom_hline(y=3, color='black', alpha=.25) +
  geom_hline(y=4, color='black', alpha=.25) +
  geom_hline(y=5, color='black', alpha=.25) +
  geom_hline(y=6, color='black', alpha=.25) +
  geom_hline(y=7, color='black', alpha=.25) +
  geom_hline(y=8, color='black', alpha=.25) +
  geom_hline(y=9, color='black', alpha=.25) +
  geom_linerange() +
  geom_line(color='black') +
  scale_color_manual(values=sepcolor[2:9]) +
  scale_x_continuous(limits=c((length(which(lsmsort$majorinitone<1)) * 5), (nrow(lsm) * 5)), expand=c(0,0)) +
  scale_y_continuous(breaks=seq(0, 9, 1)) +
  guides(color=F) +
  labs(title='Real data: MIDs at least 1') +
  theme(panel.background = element_rect(fill='grey95'),
        panel.grid.minor.x=element_blank(), 
        panel.grid.minor.y=element_blank(),
        panel.grid.major.x=element_blank(), 
        panel.grid.major.y=element_line(color='darkgrey'), 
        axis.text.x=element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        axis.ticks.x=element_blank())
realzero = ggplot(data=lsmsort[which(lsmsort$majorinitone<1),], aes(x=index, y=majorinitone, ymin=min, ymax=max, color=factor(majorinitone))) +
  geom_hline(y=1, color='black', alpha=.25) +
  geom_hline(y=2, color='black', alpha=.25) +
  geom_hline(y=3, color='black', alpha=.25) +
  geom_hline(y=4, color='black', alpha=.25) +
  geom_hline(y=5, color='black', alpha=.25) +
  geom_hline(y=6, color='black', alpha=.25) +
  geom_hline(y=7, color='black', alpha=.25) +
  geom_hline(y=8, color='black', alpha=.25) +
  geom_hline(y=9, color='black', alpha=.25) +
  geom_linerange() +
  geom_line(color='black') +
  scale_color_manual(values=sepcolor) +
  scale_x_continuous(limits=c(0, (length(which(lsmsort$majorinitone<1)) * 5)), expand=c(0,0)) +
  scale_y_continuous(breaks=seq(0, 9, 1)) +
  guides(color=F) +
  labs(title='Real data: MIDs less than 1') +
  theme(panel.background = element_rect(fill='white'),
        panel.grid.minor.x=element_blank(), 
        panel.grid.minor.y=element_blank(),
        panel.grid.major.x=element_blank(), 
        panel.grid.major.y=element_line(color='darkgrey'), 
        axis.text.x=element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        axis.ticks.x=element_blank())

# Adapts separation plot to NB count model
nbpredsort = data.frame(cbind(preds=as.vector(LSpredictor(lsm)), real=lsm$majorinitone))
nbpredsort = nbpredsort[with(nbpredsort, order(preds)), ]
nbpredsort$preds[which(nbpredsort$preds>9)] = 9
rownames(nbpredsort) = NULL
nbpredsort$index = seq(1, (nrow(lsm) * 5), 5)
nbpredsort$min = 0
nbpredsort$max = 9
nbsepplot = ggplot(data=nbpredsort, aes(x=index, y=preds, ymin=min, ymax=max, color=factor(real))) +
  geom_hline(y=1, color='black', alpha=.25) +
  geom_hline(y=2, color='black', alpha=.25) +
  geom_hline(y=3, color='black', alpha=.25) +
  geom_hline(y=4, color='black', alpha=.25) +
  geom_hline(y=5, color='black', alpha=.25) +
  geom_hline(y=6, color='black', alpha=.25) +
  geom_hline(y=7, color='black', alpha=.25) +
  geom_hline(y=8, color='black', alpha=.25) +
  geom_hline(y=9, color='black', alpha=.25) +
  geom_linerange() +
  geom_line(color='black') +
  scale_color_manual(values=sepcolor) +
  scale_x_continuous(limits=c(0, (nrow(lsm) * 5)), expand=c(0,0)) +
  scale_y_continuous(breaks=seq(0, 9, 1)) +
  guides(color=F) +
  labs(title="Negative binomial model; six predicted values rounded down to 9") +
  theme(panel.background = element_rect(fill='white'),
        panel.grid.minor.x=element_blank(), 
        panel.grid.minor.y=element_blank(),
        panel.grid.major.x=element_blank(), 
        panel.grid.major.y=element_line(color='darkgrey'), 
        axis.text.x=element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        axis.ticks.x=element_blank())
nbsepplotcount = ggplot(data=nbpredsort[which(nbpredsort$preds>=1),], aes(x=index, y=preds, ymin=min, ymax=max, color=factor(real))) +
  geom_hline(y=1, color='black', alpha=.25) +
  geom_hline(y=2, color='black', alpha=.25) +
  geom_hline(y=3, color='black', alpha=.25) +
  geom_hline(y=4, color='black', alpha=.25) +
  geom_hline(y=5, color='black', alpha=.25) +
  geom_hline(y=6, color='black', alpha=.25) +
  geom_hline(y=7, color='black', alpha=.25) +
  geom_hline(y=8, color='black', alpha=.25) +
  geom_hline(y=9, color='black', alpha=.25) +
  geom_linerange(size=3) +
  geom_line(color='black') +
  scale_color_manual(values=sepcolor[2:9]) +
  scale_x_continuous(limits=c((length(which(nbpredsort$preds<1)) * 5), (nrow(lsm) * 5)), expand=c(0,0)) +
  scale_y_continuous(breaks=seq(0, 9, 1)) +
  guides(color=F) +
  labs(title='Negative binomial model: predicted MIDs at least 1 (six extreme predictions rounded to 9)') +
  theme(panel.background = element_rect(fill='white'),
        panel.grid.minor.x=element_blank(), 
        panel.grid.minor.y=element_blank(),
        panel.grid.major.x=element_blank(), 
        panel.grid.major.y=element_line(color='darkgrey'), 
        axis.text.x=element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        axis.ticks.x=element_blank())
nbsepplotzero = ggplot(data=nbpredsort[which(nbpredsort$preds<1),], aes(x=index, y=preds, ymin=min, ymax=max, color=factor(real))) +
  geom_hline(y=1, color='black', alpha=.25) +
  geom_hline(y=2, color='black', alpha=.25) +
  geom_hline(y=3, color='black', alpha=.25) +
  geom_hline(y=4, color='black', alpha=.25) +
  geom_hline(y=5, color='black', alpha=.25) +
  geom_hline(y=6, color='black', alpha=.25) +
  geom_hline(y=7, color='black', alpha=.25) +
  geom_hline(y=8, color='black', alpha=.25) +
  geom_hline(y=9, color='black', alpha=.25) +
  geom_linerange() +
  geom_line(color='black') +
  scale_color_manual(values=sepcolor) +
  scale_x_continuous(limits=c(0, (length(which(nbpredsort$preds<1)) * 5)), expand=c(0,0)) +
  scale_y_continuous(breaks=seq(0, 9, 1)) +
  guides(color=F) +
  labs(title='Negative binomial model: predicted MIDs less than 1') +
  theme(panel.background = element_rect(fill='white'),
        panel.grid.minor.x=element_blank(), 
        panel.grid.minor.y=element_blank(),
        panel.grid.major.x=element_blank(), 
        panel.grid.major.y=element_line(color='darkgrey'), 
        axis.text.x=element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        axis.ticks.x=element_blank())

# Adapts separation plot to ZI count model
zipredsort = data.frame(cbind(preds=as.vector(JTpredictor(lsm)), real=lsm$majorinitone))
zipredsort = zipredsort[with(zipredsort, order(preds)), ]
rownames(zipredsort) = NULL
zipredsort$index = seq(1, (nrow(lsm) * 5), 5)
zipredsort$min = 0
zipredsort$max = 9
zisepplot = ggplot(data=zipredsort, aes(x=index, y=preds, ymin=min, ymax=max, color=factor(real))) +
  geom_hline(y=1, color='black', alpha=.25) +
  geom_hline(y=2, color='black', alpha=.25) +
  geom_hline(y=3, color='black', alpha=.25) +
  geom_hline(y=4, color='black', alpha=.25) +
  geom_hline(y=5, color='black', alpha=.25) +
  geom_hline(y=6, color='black', alpha=.25) +
  geom_hline(y=7, color='black', alpha=.25) +
  geom_hline(y=8, color='black', alpha=.25) +
  geom_hline(y=9, color='black', alpha=.25) +
  geom_linerange() +
  geom_line(color='black') +
  scale_color_manual(values=sepcolor) +
  scale_x_continuous(limits=c(0, (nrow(lsm) * 5)), expand=c(0,0)) +
  scale_y_continuous(breaks=seq(0, 9, 1)) +
  guides(color=F) +
  labs(title='Zero-inflated negative binomial model') +
  theme(panel.background = element_rect(fill='white'),
        panel.grid.minor.x=element_blank(), 
        panel.grid.minor.y=element_blank(),
        panel.grid.major.x=element_blank(), 
        panel.grid.major.y=element_line(color='darkgrey'), 
        axis.text.x=element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        axis.ticks.x=element_blank())
zisepplotcount = ggplot(data=zipredsort[which(zipredsort$preds>=1),], aes(x=index, y=preds, ymin=min, ymax=max, color=factor(real))) +
  geom_hline(y=1, color='black', alpha=.25) +
  geom_hline(y=2, color='black', alpha=.25) +
  geom_hline(y=3, color='black', alpha=.25) +
  geom_hline(y=4, color='black', alpha=.25) +
  geom_hline(y=5, color='black', alpha=.25) +
  geom_hline(y=6, color='black', alpha=.25) +
  geom_hline(y=7, color='black', alpha=.25) +
  geom_hline(y=8, color='black', alpha=.25) +
  geom_hline(y=9, color='black', alpha=.25) +
  geom_linerange(size=3) +
  geom_line(color='black') +
  scale_color_manual(values=sepcolor[2:9]) +
  scale_x_continuous(limits=c((length(which(zipredsort$preds<1)) * 5), (nrow(lsm) * 5)), expand=c(0,0)) +
  scale_y_continuous(breaks=seq(0, 9, 1)) +
  guides(color=F) +
  labs(title='Zero-inflated negative binomial model: predicted MIDs at least 1') +
  theme(panel.background = element_rect(fill='white'),
        panel.grid.minor.x=element_blank(), 
        panel.grid.minor.y=element_blank(),
        panel.grid.major.x=element_blank(), 
        panel.grid.major.y=element_line(color='darkgrey'), 
        axis.text.x=element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        axis.ticks.x=element_blank())
zisepplotzero = ggplot(data=zipredsort[which(zipredsort$preds<1),], aes(x=index, y=preds, ymin=min, ymax=max, color=factor(real))) +
  geom_hline(y=1, color='black', alpha=.25) +
  geom_hline(y=2, color='black', alpha=.25) +
  geom_hline(y=3, color='black', alpha=.25) +
  geom_hline(y=4, color='black', alpha=.25) +
  geom_hline(y=5, color='black', alpha=.25) +
  geom_hline(y=6, color='black', alpha=.25) +
  geom_hline(y=7, color='black', alpha=.25) +
  geom_hline(y=8, color='black', alpha=.25) +
  geom_hline(y=9, color='black', alpha=.25) +
  geom_linerange() +
  geom_line(color='black') +
  scale_color_manual(values=sepcolor) +
  scale_x_continuous(limits=c(0, (length(which(zipredsort$preds<1)) * 5)), expand=c(0,0)) +
  scale_y_continuous(breaks=seq(0, 9, 1)) +
  guides(color=F) +
  labs(title='Zero-inflated negative binomial model: predicted MIDs less than 1') +
  theme(panel.background = element_rect(fill='white'),
        panel.grid.minor.x=element_blank(), 
        panel.grid.minor.y=element_blank(),
        panel.grid.major.x=element_blank(), 
        panel.grid.major.y=element_line(color='darkgrey'), 
        axis.text.x=element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        axis.ticks.x=element_blank())

sepplotGrid2 = arrangeGrob(realzero, nbsepplotzero, zisepplotzero, realcount, nbsepplotcount, zisepplotcount, ncol=1)
ggsave(file='sepplotGrid2.pdf', sepplotGrid2, width=9, height=12, units='in')

save(file='panel3_pseudo-separation_plots.RData', lsmsort, realzero, realcount, nbpredsort, nbsepplotzero, nbsepplotcount, zipredsort, zisepplotzero, zisepplotcount)
